#! /bin/bash

# Script to prepare the files for building a PCRE2 release. It does some
# processing of the documentation and detrails files.

# You must run this script before runnning "make dist". If its first argument
# is "doc", it stops after preparing the documentation. There are no other
# arguments. The script makes use of the following files:

# 132html     A Perl script that converts a .1 or .3 man page into HTML. It
#             "knows" the relevant troff constructs that are used in the PCRE2
#             man pages.

# CheckMan    A Perl script that checks man pages for typos in the mark up.

# CleanTxt    A Perl script that cleans up the output of "nroff -man" by
#             removing backspaces and other redundant text so as to produce
#             a readable .txt file.

# Detrail     A Perl script that removes trailing spaces from files.

# LintMan     A Perl script that lints man pages looking for inconsistencies.

# doc/index.html.src
#             A file that is copied as index.html into the doc/html directory
#             when the HTML documentation is built. It works like this so that
#             doc/html can be deleted and re-created from scratch.

# README & NON-AUTOTOOLS-BUILD
#             These files are copied into the doc/html directory, with .txt
#             extensions so that they can by hyperlinked from the HTML
#             documentation, because some people just go to the HTML without
#             looking for text files.

# Set the LANG to C, because nroff converts ASCII "HYPHEN-MINUS" to Unicode
# "HYPHEN" if the system is using a UTF-8 locale (like "C.UTF-8").
export LANG=C LC_ALL=C

# Extract the current release version from configure.ac.
CURRENT_RELEASE=`grep -E 'm4_define\(pcre2_(major|minor|prerelease)' configure.ac | \
    grep -E -o '\[.*\]' | \
    sed -E -e '1s/$/./' | \
    tr -d '[]\n'`
export CURRENT_RELEASE

# First, sort out the documentation. Remove pcre2demo.3 first because it won't
# pass the markup check (it is created below, using markup that none of the
# other pages use).

cd doc
echo Processing documentation

/bin/rm -f pcre2demo.3

# Check the remaining man pages

perl ../maint/CheckMan *.1 *.3
if [ $? != 0 ] ; then exit 1; fi

perl ../maint/LintMan -v *.3
if [ $? != 0 ] ; then exit 1; fi

# Verify the version number in the man pages

for file in *.1 *.3 ; do
  if ! grep -qE ".TH.*\"PCRE2 $CURRENT_RELEASE\"" "$file" ; then
    echo "Version number in $file does not match current release"
    exit 1
  fi
done

# Make Text form of the documentation. It needs some mangling to make it
# tidy for online reading. Concatenate all the .3 stuff, but omit the
# individual function pages.

cat <<End >pcre2.txt
-----------------------------------------------------------------------------
This file contains a concatenation of the PCRE2 man pages, converted to plain
text format for ease of searching with a text editor, or for use on systems
that do not have a man page processor. The small individual files that give
synopses of each function in the library have not been included. Neither has
the pcre2demo program. There are separate text files for the pcre2grep and
pcre2test commands.
-----------------------------------------------------------------------------


End

echo "Making pcre2.txt"
for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
            pcre2limits pcre2matching pcre2partial pcre2pattern pcre2perform \
            pcre2posix pcre2sample pcre2serialize pcre2syntax \
            pcre2unicode ; do
  echo "  Processing $file.3"
  nroff -c -man $file.3 >$file.rawtxt
  perl ../maint/CleanTxt <$file.rawtxt >>pcre2.txt
  /bin/rm $file.rawtxt
  echo "------------------------------------------------------------------------------" >>pcre2.txt
  if [ "$file" != "pcre2sample" ] ; then
    echo "" >>pcre2.txt
    echo "" >>pcre2.txt
  fi
done

# The three commands
for file in pcre2test pcre2grep pcre2-config ; do
  echo Making $file.txt
  nroff -c -man $file.1 >$file.rawtxt
  perl ../maint/CleanTxt <$file.rawtxt >$file.txt
  /bin/rm $file.rawtxt
done


# Make pcre2demo.3 from the pcre2demo.c source file

echo "Making pcre2demo.3"
perl <<"END" >pcre2demo.3
  open(IN, "<", "../src/pcre2demo.c") or die "Failed to open src/pcre2demo.c";
  open(OUT, ">", "pcre2demo.3") or die "Failed to create pcre2demo.3";
  my $t = `git log -n1 --date=format:"%d %B %Y" --format=%cd ../src/pcre2demo.c`;
  chomp $t;
  my $version = $ENV{CURRENT_RELEASE};
  print OUT ".TH PCRE2DEMO 3 \"", $t, '" "PCRE2 ', $version, "\"\n" .
            ".\\\"AUTOMATICALLY GENERATED BY UpdateAlways - do not EDIT!\n" .
            ".SH NAME\n" .
            "PCRE2DEMO - A demonstration C program for PCRE2\n" .
            ".SH \"SOURCE CODE\"\n" .
            ".rs\n" .
            ".sp\n" .
            ".\\\" Start example.\n" .
            ".de EX\n" .
	    ".	do ds mF \\\\n[.fam]\n" .
            ".  nr mE \\\\n(.f\n" .
            ".  nf\n" .
            ".  nh\n" .
	    ".	do fam C\n" .
            ".  ft CW\n" .
            "..\n" .
            ".\n" .
            ".\n" .
            ".\\\" End example.\n" .
            ".de EE\n" .
	    ".	do fam \\\\*(mF\n" .
            ".  ft \\\\n(mE\n" .
            ".  fi\n" .
            ".  hy \\\\n(HY\n" .
            "..\n" .
            ".\n" .
            ".RS -7\n" .
            ".EX\n" ;
  while (<IN>)
    {
    s/\\/\\e/g;
    print OUT;
    }
  print OUT ".EE\n";
  close(OUT);
  close(IN);
END
if [ $? != 0 ] ; then exit 1; fi


# Verify that `man` can process the pages without warnings.
if man --help 2>/dev/null | grep -q warnings ; then
  for file in *.1 *.3 ; do
    MAN_OUT=`MANROFFSEQ='' MANWIDTH=80 man --warnings=w,all -E UTF-8 -l -Tutf8 -Z "$file" 2>&1 >/dev/null`
    if [ "$MAN_OUT" != "" ]; then
      printf "Running man generated warnings:\n%s\n" "$MAN_OUT"
      exit 1
    fi
  done
elif [ -n "$CI" ] && [ -n "$GITHUB_REPOSITORY" ] ; then
  echo "::error title=UpdateAlways::man-db not installed in worker image"
else
  echo "Warning: skipping man-db warnings checking"
fi


# Make HTML form of the documentation.

echo "Making HTML documentation"
/bin/rm html/*
cp index.html.src html/index.html
cp ../README html/README.txt
cp ../NON-AUTOTOOLS-BUILD html/NON-AUTOTOOLS-BUILD.txt

for file in *.1 ; do
  base=`basename $file .1`
  echo "  Making $base.html"
  perl ../maint/132html -toc $base <$file >html/$base.html
  if [ $? != 0 ] ; then exit 1; fi
done

# Exclude table of contents for function summaries. It seems that expr
# forces an anchored regex. Also exclude them for small pages that have
# only one section.

for file in *.3 ; do
  base=`basename $file .3`
  toc=-toc
  if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
  if [ "$base" = "pcre2sample" ]  || \
     [ "$base" = "pcre2compat" ]  || \
     [ "$base" = "pcre2demo" ]    || \
     [ "$base" = "pcre2limits" ]  || \
     [ "$base" = "pcre2unicode" ] ; then
    toc=""
  fi
  echo "  Making $base.html"
  perl ../maint/132html $toc $base <$file >html/$base.html
  if [ $? != 0 ] ; then exit 1; fi
done

# End of documentation processing; stop if only documentation required.

cd ..
echo Documentation done
if [ "$1" = "doc" ] ; then exit; fi

# These files are detrailed; do not detrail the test data because there may be
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
# line endings and the detrail script removes all trailing white space. The
# configure files are also omitted from the detrailing.

txt_files=(
  AUTHORS.md
  BUILD.bazel
  CMakeLists.txt
  COPYING
  ChangeLog
  HACKING
  INSTALL
  LICENCE.md
  MODULE.bazel
  Makefile.am
  NEWS
  NON-AUTOTOOLS-BUILD
  README
  RunGrepTest
  RunTest
  SECURITY.md
  build.zig
  configure.ac
  libpcre2-8.pc.in
  libpcre2-16.pc.in
  libpcre2-32.pc.in
  libpcre2-posix.pc.in
  pcre2-config.in
  perltest.sh
  cmake/COPYING-CMAKE-SCRIPTS
  cmake/{*.cmake,*.cmake.in}
  m4/ax_check_vscript.m4
  m4/ax_pthread.m4
  m4/pcre2_visibility.m4
  m4/pcre2_zos.m4
  doc/p*
  doc/html/*
  src/libpcre2-*.sym
  )

crlf_files=(
  RunGrepTest.bat
  RunTest.bat
  )

c_files=(
  src/config-cmake.h.in
  src/pcre2.h.in
  src/pcre2_auto_possess.c
  src/pcre2_chartables.c.dist
  src/pcre2_chartables.c.ebcdic-1047-nl15
  src/pcre2_chartables.c.ebcdic-1047-nl25
  src/pcre2_chkdint.c
  src/pcre2_compile.c
  src/pcre2_compile.h
  src/pcre2_compile_cgroup.c
  src/pcre2_compile_class.c
  src/pcre2_config.c
  src/pcre2_context.c
  src/pcre2_convert.c
  src/pcre2_dfa_match.c
  src/pcre2_dftables.c
  src/pcre2_error.c
  src/pcre2_extuni.c
  src/pcre2_find_bracket.c
  src/pcre2_fuzzsupport.c
  src/pcre2_internal.h
  src/pcre2_intmodedep.h
  src/pcre2_jit_char_inc.h
  src/pcre2_jit_compile.c
  src/pcre2_jit_match_inc.h
  src/pcre2_jit_misc_inc.h
  src/pcre2_jit_simd_inc.h
  src/pcre2_jit_test.c
  src/pcre2_maketables.c
  src/pcre2_match.c
  src/pcre2_match_data.c
  src/pcre2_match_next.c
  src/pcre2_newline.c
  src/pcre2_ord2utf.c
  src/pcre2_pattern_info.c
  src/pcre2_printint_inc.h
  src/pcre2_script_run.c
  src/pcre2_serialize.c
  src/pcre2_string_utils.c
  src/pcre2_study.c
  src/pcre2_substitute.c
  src/pcre2_substring.c
  src/pcre2_tables.c
  src/pcre2_ucd.c
  src/pcre2_ucp.h
  src/pcre2_ucptables_inc.h
  src/pcre2_util.h
  src/pcre2_valid_utf.c
  src/pcre2_xclass.c
  src/pcre2demo.c
  src/pcre2grep.c
  src/pcre2posix.c
  src/pcre2posix.h
  src/pcre2posix_test.c
  src/pcre2test.c
  src/pcre2test_inc.h
  )

echo Detrailing
perl maint/Detrail "${txt_files[@]}" "${c_files[@]}"

echo Validating all text
perl maint/CheckTxt "${txt_files[@]}"
perl maint/CheckTxt -ascii "${c_files[@]}"
perl maint/CheckTxt -crlf "${crlf_files[@]}"

# Verify the version number in the Bazel file
if ! grep -E "version = \"$CURRENT_RELEASE\"" MODULE.bazel >/dev/null ; then
  echo "Version number in MODULE.bazel does not match current release"
  exit 1
fi

echo Done

#End
